Correlation

bedtools multicov

### Linux command
abdir="/rds/general/user/la420/home/CUTnTAG/antibodies"
corrdir="${abdir}/correlation"
bamdir="${abdir}/bam/sorted_bam"
beddir="${corrdir}/ENCODE_peak_files"
ENCODE_bamdir="${corrdir}/ENCODE_peak_files/bam/sorted_bam/fixed"
old_ENCODE_bamdir="${corrdir}/ENCODE_peak_files/bam/sorted_bam"
outdir="${corrdir}/all_marks"
#mergedbeddir="${abdir}/all_peaks/merged"

mark="H3K27ac_ENCFF044JNJ"
#mark="H3K27me3_ENCFF126QYP"

bedtools multicov -bams \
$bamdir/Abcam-ab177178_rmDup.sorted.fixed2.bam \
$bamdir/Abcam-ab4729_rmDup.sorted.fixed2.bam \
$bamdir/ActiveMotif_rmDup.sorted.fixed2.bam \
$bamdir/Diagenode_100x_rmDup.sorted.fixed2.bam \
$bamdir/Diagenode_50x_rmDup.sorted.fixed2.bam \
$bamdir/H3K27me3_rmDup.sorted.fixed2.bam \
$old_ENCODE_bamdir/H3K27ac_ENCFF384ZZM_sorted.bam \
$old_ENCODE_bamdir/H3K27me3_ENCFF676ORH_sorted.bam \
$ENCODE_bamdir/H3K4me1_ENCFF196QGZ_sorted.fixed.bam \
$ENCODE_bamdir/H3K4me3_ENCFF915MJO_sorted.fixed.bam \
$ENCODE_bamdir/H3K9ac_ENCFF204DNG_sorted.fixed.bam \
$ENCODE_bamdir/H3K9me3_ENCFF805FLY_sorted.fixed.bam \
$ENCODE_bamdir/H3K36me3_ENCFF190AQC_sorted.fixed.bam \
$ENCODE_bamdir/DNase_ENCFF826DJP_sorted.fixed.bam \
-bed $beddir/${mark}.bed.narrowPeak > $outdir/${mark}_overlaps.txt

Import peak read counts

datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies/correlation/all_marks/"
markList = c("H3K27ac_ENCFF044JNJ", "H3K27me3_ENCFF126QYP")
countsList = list()

for(i in 1:length(markList)){
  countsList[[i]] = read.csv(paste0(datadir, markList[i], "_overlaps.txt"), sep = "\t", header = FALSE)
}

Make normalized matrix

countsList_copy = countsList

names = c("Abcam-ab177178", "Abcam-ab4729", "Active Motif", "Diagenode (1:100)", "Diagenode (1:50)", "H3K27me3", "ENCODE_H3K27ac", "ENCODE_H3K27me3", "ENCODE_H3K4me1", "ENCODE_H3K4me3", "ENCODE_H3K9ac", "ENCODE_H3K9me3", "ENCODE_H3K36me3", "ENCODE_DNase")

for(i in 1:length(countsList_copy)){
  countsList_copy[[i]] = countsList_copy[[i]][,11:24] %>% as.matrix() %>% preprocessCore::normalize.quantiles() %>% round() %>% cor(method = "pearson")
  colnames(countsList_copy[[i]]) = names
  rownames(countsList_copy[[i]]) = names
}

Correlation plots

col = colorRampPalette(brewer.pal(8, "PuOr"))(200)
col2 = c(col, col)
H3K27ac: Enhancers, promoters; permissive
H3K27me3: Promoters, gene-rich regions; repressing
H3K9me3: Satellite repeats, telomeres, pericentromeres; repressing
H3K9ac: Enhancers, promoters; permissive
H3K4me1: Enhancers; permissive
H3K4me3: Promoters; permissive
H3K36me3: Transcribed regions; permissive  
DNase: Open chromatin regions

With 500bp genome-wide bins (hg19)

datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies/correlation/all_marks/"
counts = read.csv(paste0(datadir, "hg19_500bp_overlaps_old.txt"), sep = "\t", header = FALSE)
counts_copy = counts

counts_copy = counts_copy[,4:17] %>% as.matrix() %>% preprocessCore::normalize.quantiles() %>% round() %>% cor(method = "pearson")
colnames(counts_copy) = names
rownames(counts_copy) = names
corrplot(counts_copy, method = "color", col = col, type = "upper", order = "original", tl.col = "black", tl.srt = 50, tl.cex = 0.5, cl.lim = c(0,1), addCoef.col = "black", number.cex = 0.5)

counts = counts_copy %>% round(3)
heatmap.2(x = counts, col = col, trace = "none", density.info = "none", cexRow = 1, cexCol = 1, cellnote = counts, notecex = 1, notecol = "black", lhei = c(1, 10), lwid = c(1, 7), margins = c(15, 15))

With ENCODE bed files

H3K27ac

corrplot(countsList_copy[[1]], method = "color", col = col2, type = "upper", order = "original", tl.col = "black", tl.srt = 50, tl.cex = 0.5, cl.lim = c(0,1), addCoef.col = "black", number.cex = 0.5)

counts = countsList_copy[[1]] %>% round(3)
heatmap.2(x = counts, col = col, trace = "none", density.info = "none", cexRow = 1, cexCol = 1, cellnote = counts, notecex = 1, notecol = "black", lhei = c(1, 10), lwid = c(1, 7), margins = c(15, 15))

H3K27me3

corrplot(countsList_copy[[2]], method = "color", col = col2, type = "upper", order = "original", tl.col = "black", tl.srt = 50, tl.cex = 0.5, cl.lim = c(0,1), addCoef.col = "black", number.cex = 0.5)

counts = countsList_copy[[2]] %>% round(3)
heatmap.2(x = counts, col = col, trace = "none", density.info = "none", cexRow = 1, cexCol = 1, cellnote = counts, notecex = 1, notecol = "black", lhei = c(1, 10), lwid = c(1, 7), margins = c(15, 15))

With merged sample peaks

datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies/correlation/all_marks/"
markList = c("SEACR_noDup", "MACS2_noDup")
countsList = list()

for(i in 1:length(markList)){
  countsList[[i]] = read.csv(paste0(datadir, markList[i], "_overlaps.txt"), sep = "\t", header = FALSE)
}
countsList_copy = countsList

for(i in 1:length(countsList_copy)){
  countsList_copy[[i]] = countsList_copy[[i]][,4:17] %>% as.matrix() %>% preprocessCore::normalize.quantiles() %>% round() %>% cor(method = "pearson")
  colnames(countsList_copy[[i]]) = names
  rownames(countsList_copy[[i]]) = names
}

SEACR

counts = countsList_copy[[1]] %>% round(3)
heatmap.2(x = counts, col = col, trace = "none", density.info = "none", cexRow = 1, cexCol = 1, cellnote = counts, notecex = 1, notecol = "black", lhei = c(1, 10), lwid = c(1, 7), margins = c(15, 15))

MACS2

counts = countsList_copy[[2]] %>% round(3)
heatmap.2(x = counts, col = col, trace = "none", density.info = "none", cexRow = 1, cexCol = 1, cellnote = counts, notecex = 1, notecol = "black", lhei = c(1, 10), lwid = c(1, 7), margins = c(15, 15))

ATAC-seq

ATAC-seq libraries: ENCLB918NXF, ENCLB758GEG from https://www.encodeproject.org/experiments/ENCSR483RKN/

peakdir = "/rds/general/user/la420/home/CUTnTAG/ATACseq/nfcore/results/bwa/mergedReplicate/macs/narrowPeak"
ATAC_path = file.path(peakdir, "K562_ATACseq.mRp.clN_peaks.narrowPeak")
ATAC = ChIPseeker::readPeakFile(ATAC_path, as = "GRanges") %>% BRGenomics::tidyChromosomes(keep.X = TRUE, keep.Y = TRUE)

extraCols_narrowPeak = c(signalValue = "numeric", pValue = "numeric", qValue = "numeric", peak = "integer")
ENCODE_H3K27ac = rtracklayer::import("https://www.encodeproject.org/files/ENCFF044JNJ/@@download/ENCFF044JNJ.bed.gz", format = "BED", extraCols = extraCols_narrowPeak)
overlaps_ATAC_ENCODE = GenomicRanges::countOverlaps(query = ATAC, subject = ENCODE_H3K27ac)
overlaps_ENCODE_ATAC = GenomicRanges::countOverlaps(query = ENCODE_H3K27ac, subject = ATAC)

length(overlaps_ATAC_ENCODE[overlaps_ATAC_ENCODE != 0])
## [1] 26673
length(overlaps_ENCODE_ATAC[overlaps_ENCODE_ATAC != 0])
## [1] 35510
length(ATAC)
## [1] 85342
length(ENCODE_H3K27ac)
## [1] 51176

C&T fragments in ENCODE peaks

datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729", "H3K27me3")

Total_ENCODE_inPeakData = c()
peakRes = data.frame(ENCODE_H3K27ac)

for(sample in sampleList){
  peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
  bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
  fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
  Total_ENCODE_inPeakN = counts(fragment_counts)[,1] %>% sum
  Total_ENCODE_inPeakData = rbind(Total_ENCODE_inPeakData, data.frame(Antibody = sample, Total_ENCODE_inPeakN = Total_ENCODE_inPeakN))
}

Total_ENCODE_inPeakData

ENCODE fragments in ENCODE peaks

datadir_ENCODE = "/rds/general/user/la420/home/CUTnTAG/antibodies"
ENCODE_sampleList = c("H3K27ac_ENCFF384ZZM", "H3K27me3_ENCFF676ORH")

Total_ENCODE_inPeakData = c()
peakRes = data.frame(ENCODE_H3K27ac)

for(sample in sampleList){
  peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
  bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
  fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = FALSE, by_rg = FALSE, format = "bam")
  Total_ENCODE_inPeakN = counts(fragment_counts)[,1] %>% sum
  Total_ENCODE_inPeakData = rbind(Total_ENCODE_inPeakData, data.frame(Antibody = sample, Total_ENCODE_inPeakN = Total_ENCODE_inPeakN))
}

Total_ENCODE_inPeakData

C&T fragments in ATAC peaks

datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729", "H3K27me3")
sampleList = c("H3K27me3")

Total_ATAC_inPeakData = c()
peakRes = data.frame(ATAC)

for(sample in sampleList){
  peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
  bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
  fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
  Total_ATAC_inPeakN = counts(fragment_counts)[,1] %>% sum
  Total_ATAC_inPeakData = rbind(Total_ATAC_inPeakData, data.frame(Antibody = sample, Total_ATAC_inPeakN = Total_ATAC_inPeakN))
}

Total_ATAC_inPeakData

C&T fragments in ATAC-only peaks

ATAC_not_ENCODE = IRanges::subsetByOverlaps(ATAC, ENCODE_H3K27ac, invert = TRUE)
datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729", "H3K27me3")

ATAC_inPeakData = c()
peakRes = data.frame(ATAC_not_ENCODE)

for(sample in sampleList){
  peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
  bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
  fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
  ATAC_inPeakN = counts(fragment_counts)[,1] %>% sum
  ATAC_inPeakData = rbind(ATAC_inPeakData, data.frame(Antibody = sample, ATAC_inPeakN = ATAC_inPeakN))
}

ATAC_inPeakData

C&T fragments in ENCODE-only peaks

ENCODE_not_ATAC = IRanges::subsetByOverlaps(ENCODE_H3K27ac, ATAC, invert = TRUE)
datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies"
sampleList = c("ActiveMotif", "Diagenode_100x", "Diagenode_50x", "Abcam-ab177178", "Abcam-ab4729", "H3K27me3")

ENCODE_inPeakData = c()
peakRes = data.frame(ENCODE_not_ATAC)

for(sample in sampleList){
  peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
  bamFile = paste0(datadir, "/bam/", sample, "_bowtie2_rmDup.mapped.bam")
  fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = TRUE, by_rg = FALSE, format = "bam")
  ENCODE_inPeakN = counts(fragment_counts)[,1] %>% sum
  ENCODE_inPeakData = rbind(ENCODE_inPeakData, data.frame(Antibody = sample, ENCODE_inPeakN = ENCODE_inPeakN))
}

ENCODE_inPeakData

ENCODE fragments in ATAC-only peaks

datadir = "/rds/general/user/la420/home/CUTnTAG/antibodies"
ENCODE_sampleList = c("H3K27ac_ENCFF384ZZM", "H3K27me3_ENCFF676ORH")

peakRes = data.frame(ATAC_not_ENCODE)

ENCODE_ATAC_inPeakData = c()
for(sample in ENCODE_sampleList){
  peak.gr = GenomicRanges::GRanges(seqnames = peakRes$seqnames, IRanges(start = peakRes$start, end = peakRes$end), strand = "*")
  bamFile = paste0(datadir, "/correlation/ENCODE_peak_files/bam/", sample, ".bam")
  fragment_counts = chromVAR::getCounts(bamFile, peak.gr, paired = FALSE, by_rg = FALSE, format = "bam")
  ENCODE_ATAC_inPeakN = counts(fragment_counts)[,1] %>% sum
  ENCODE_ATAC_inPeakData = rbind(ENCODE_ATAC_inPeakData, data.frame(Antibody = sample, ENCODE_ATAC_inPeakN = ENCODE_ATAC_inPeakN))
}

ENCODE_total_frags = c(9770603, 12064137)
ENCODE_ATAC_inPeakData$UniqueFragNum = ENCODE_total_frags
ENCODE_ATAC_inPeakData$Percentage = ENCODE_ATAC_inPeakData$ENCODE_ATAC_inPeakN / ENCODE_ATAC_inPeakData$UniqueFragNum * 100

ENCODE_ATAC_inPeakData

Summary

## summarize the duplication information from the picard summary outputs
picarddir = "/rds/general/user/la420/home/CUTnTAG/antibodies/picard_summary/"

dupResult = c()
for(sample in sampleList){
  dupRes = read.table(paste0(picarddir, sample, "_picard.rmDup.txt"), header = TRUE, fill = TRUE)
  dupResult = data.frame(Antibody = sample, MappedFragNum_hg19 = dupRes$READ_PAIRS_EXAMINED[1] %>% as.character %>% as.numeric, DuplicationRate = dupRes$PERCENT_DUPLICATION[1] %>% as.character %>% as.numeric * 100) %>% mutate(UniqueFragNum = (MappedFragNum_hg19 * (1-DuplicationRate/100)) %>% round()) %>% rbind(dupResult, .)
}

dupResult = dupResult[c("Antibody", "UniqueFragNum")]
dupResult
peakData = dupResult %>% left_join(Total_ENCODE_inPeakData, by = "Antibody") %>% left_join(ENCODE_inPeakData, by = "Antibody") %>% left_join(Total_ATAC_inPeakData, by = "Antibody") %>% left_join(ATAC_inPeakData, by = "Antibody")

peakData$Percentage_ATAC = peakData$Total_ATAC_inPeakN / peakData$UniqueFragNum * 100
peakData$Percentage_ENCODE = peakData$Total_ENCODE_inPeakN / peakData$UniqueFragNum * 100
peakData$Percentage_ATAC_only = peakData$ENCODE_inPeakN / peakData$UniqueFragNum * 100
peakData$Percentage_ENCODE_only = peakData$ATAC_inPeakN / peakData$UniqueFragNum * 100

peakData = peakData %>% dplyr::select(Antibody, UniqueFragNum, Total_ENCODE_inPeakN, ENCODE_inPeakN, Total_ATAC_inPeakN, ATAC_inPeakN, Percentage_ENCODE, Percentage_ENCODE_only, Percentage_ATAC, Percentage_ATAC_only)

colnames(peakData) = c("Antibody", "Unique Fragments", "Fragments in ENCODE H3K27ac peaks", "Fragments in ENCODE H3K27ac peaks (exc ATAC)", "Fragments in ATAC peaks", "Fragments in ATAC peaks (exc ENCODE)", "% of fragments in ENCODE H3K27ac (total)", "% of fragments in ENCODE H3K27ac (exc. ATAC)", "% of fragments in ATAC (total)", "% of fragments in ATAC (exc. ENCODE)")
peakData
ATAC_peakData = peakData[c("Antibody", "% of fragments in ENCODE H3K27ac (total)", "% of fragments in ATAC (total)", "% of fragments in ENCODE H3K27ac (exc. ATAC)", "% of fragments in ATAC (exc. ENCODE)")]

ATAC_peakData_melt = melt(ATAC_peakData, id.vars = c("Antibody"))

ENCODE_ATAC_plot = ggplot(ATAC_peakData_melt, aes(Antibody, value)) +
  geom_bar(aes(fill = variable), 
  width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +  
  ylim(0, 40) +
  ylab("% of fragments in peaks") +
  xlab("Antibody") +
  ggtitle("Fragments in ENCODE H3K27ac vs ATAC peaks") +
  theme_bw()

ENCODE_ATAC_plot

ENCODE fragments in ATAC-only peaks

ENCODE_in_ATAC_plot = ggplot(ENCODE_ATAC_inPeakData, aes(Antibody, Percentage)) +
  geom_bar(aes(fill = Antibody), 
  width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +  
  ylim(0, 40) +
  ylab("% of fragments in peaks") +
  xlab("Antibody") +
  ggtitle("ENCODE fragments in ATAC-only peaks") +
  theme_bw()

ENCODE_in_ATAC_plot

Peak calling with IgG control

Peak numbers

peakdir_IgG = "/rds/general/user/la420/home/CUTnTAG/IgG/peakCalling/all_peaks/"
peakdir_no_IgG = "/rds/general/user/la420/home/CUTnTAG/antibodies/all_peaks/"
sampleList = c("ActiveMotif_SEACR", "ActiveMotif_MACS2", "Diagenode_100x_SEACR", "Diagenode_100x_MACS2", "Diagenode_50x_SEACR", "Diagenode_50x_MACS2", "Abcam-ab177178_SEACR", "Abcam-ab177178_MACS2", "Abcam-ab4729_SEACR", "Abcam-ab4729_MACS2", "H3K27me3_SEACR", "H3K27me3_MACS2")

Total_CT_peaks = c()

for(sample in sampleList){
    peakInfo_IgG = read.table(paste0(peakdir_IgG, sample, "_IgG.bed"), header = FALSE, fill = TRUE)
    peakInfo_no_IgG = read.table(paste0(peakdir_no_IgG, sample, ".bed"), header = FALSE, fill = TRUE)
    Total_CT_peaks = data.frame(Antibody = sample, Total_peaks_with_IgG = nrow(peakInfo_IgG), Total_peaks_without_IgG = nrow(peakInfo_no_IgG)) %>% rbind(Total_CT_peaks, .)
}

Total_CT_peaks %>% dplyr::select(Antibody, Total_peaks_with_IgG, Total_peaks_without_IgG) 

ENCODE H3K27ac peak overlap

extraCols_narrowPeak = c(signalValue = "numeric", pValue = "numeric", qValue = "numeric", peak = "integer")

ENCODE_H3K27ac = rtracklayer::import("https://www.encodeproject.org/files/ENCFF044JNJ/@@download/ENCFF044JNJ.bed.gz", format = "BED", extraCols = extraCols_narrowPeak)

ENCODE_H3K27me3 = rtracklayer::import("https://www.encodeproject.org/files/ENCFF126QYP/@@download/ENCFF126QYP.bed.gz", format = "BED", extraCols = extraCols_narrowPeak)

C&T peaks in ENCODE

ENCODE_H3K27ac_overlapping_peaks_noIgG = c()
ENCODE_H3K27ac_overlapping_peaks_IgG = c()

for(i in 1:length(peak_list_IgG)){
  overlaps_H3K27ac_noIgG = GenomicRanges::countOverlaps(query = peak_list_noIgG[[i]], subject = ENCODE_H3K27ac)
  overlaps_H3K27ac_IgG = GenomicRanges::countOverlaps(query = peak_list_IgG[[i]], subject = ENCODE_H3K27ac)
  
  CT_peaks_in_ENCODE_H3K27ac_noIgG = overlaps_H3K27ac_noIgG[overlaps_H3K27ac_noIgG != 0] %>% length()
  CT_peaks_in_ENCODE_H3K27ac_IgG = overlaps_H3K27ac_IgG[overlaps_H3K27ac_IgG != 0] %>% length()
  
  ENCODE_H3K27ac_overlapping_peaks_noIgG = c(ENCODE_H3K27ac_overlapping_peaks_noIgG, CT_peaks_in_ENCODE_H3K27ac_noIgG)
  ENCODE_H3K27ac_overlapping_peaks_IgG = c(ENCODE_H3K27ac_overlapping_peaks_IgG, CT_peaks_in_ENCODE_H3K27ac_IgG)
}

Total_CT_peaks_falling_into_ENCODE = data.frame(Antibody = sampleList, CT_peaks_in_ENCODE_H3K27ac_noIgG = ENCODE_H3K27ac_overlapping_peaks_noIgG, CT_peaks_in_ENCODE_H3K27ac_IgG = ENCODE_H3K27ac_overlapping_peaks_IgG)
Total_CT_peaks_falling_into_ENCODE

ENCODE peaks in C&T

captured_H3K27ac_ENCODE_list_noIgG = c()
captured_H3K27ac_ENCODE_list_IgG = c()

for(i in 1:length(peak_list_IgG)){
  overlaps_H3K27ac_noIgG = GenomicRanges::countOverlaps(query = ENCODE_H3K27ac, subject = peak_list_noIgG[[i]])
  overlaps_H3K27ac_IgG = GenomicRanges::countOverlaps(query = ENCODE_H3K27ac, subject = peak_list_IgG[[i]])
  
  ENCODE_H3K27ac_captured_peaks_noIgG = overlaps_H3K27ac_noIgG[overlaps_H3K27ac_noIgG != 0] %>% length()
  ENCODE_H3K27ac_captured_peaks_IgG = overlaps_H3K27ac_IgG[overlaps_H3K27ac_IgG != 0] %>% length()
  
  captured_H3K27ac_ENCODE_list_noIgG = c(captured_H3K27ac_ENCODE_list_noIgG, ENCODE_H3K27ac_captured_peaks_noIgG)
  captured_H3K27ac_ENCODE_list_IgG = c(captured_H3K27ac_ENCODE_list_IgG, ENCODE_H3K27ac_captured_peaks_IgG)
}

Total_captured_ENCODE_peaks = data.frame(Antibody = sampleList, ENCODE_H3K27ac_captured_peaks_noIgG = captured_H3K27ac_ENCODE_list_noIgG, ENCODE_H3K27ac_captured_peaks_IgG = captured_H3K27ac_ENCODE_list_IgG)
Total_captured_ENCODE_peaks

Summary

ENCODE_overlaps_H3K27ac = Total_CT_peaks %>% left_join(Total_captured_ENCODE_peaks, by = "Antibody") %>% left_join(Total_CT_peaks_falling_into_ENCODE, by = "Antibody")

ENCODE_overlaps_H3K27ac$Percentage_of_total_CT_IgG = ENCODE_overlaps_H3K27ac$CT_peaks_in_ENCODE_H3K27ac_IgG / ENCODE_overlaps_H3K27ac$Total_peaks_with_IgG * 100
ENCODE_overlaps_H3K27ac$Percentage_of_total_CT_noIgG = ENCODE_overlaps_H3K27ac$CT_peaks_in_ENCODE_H3K27ac_noIgG / ENCODE_overlaps_H3K27ac$Total_peaks_without_IgG * 100

ENCODE_overlaps_H3K27ac$Percentage_of_total_ENCODE_IgG = ENCODE_overlaps_H3K27ac$ENCODE_H3K27ac_captured_peaks_IgG / length(ENCODE_H3K27ac) * 100
ENCODE_overlaps_H3K27ac$Percentage_of_total_ENCODE_noIgG = ENCODE_overlaps_H3K27ac$ENCODE_H3K27ac_captured_peaks_noIgG / length(ENCODE_H3K27ac) * 100

ENCODE_overlaps_H3K27ac = ENCODE_overlaps_H3K27ac[c("Antibody", "Total_peaks_with_IgG", "Total_peaks_without_IgG", "CT_peaks_in_ENCODE_H3K27ac_IgG", "Percentage_of_total_CT_IgG", "CT_peaks_in_ENCODE_H3K27ac_noIgG", "Percentage_of_total_CT_noIgG", "ENCODE_H3K27ac_captured_peaks_IgG", "Percentage_of_total_ENCODE_IgG", "ENCODE_H3K27ac_captured_peaks_noIgG", "Percentage_of_total_ENCODE_noIgG")]
ENCODE_overlaps_H3K27ac

Proportion of ENCODE peaks captured by C&T (chart)

ENCODE_overlaps_H3K27ac_perc = ENCODE_overlaps_H3K27ac[c("Antibody", "Percentage_of_total_ENCODE_noIgG", "Percentage_of_total_ENCODE_IgG")]
colnames(ENCODE_overlaps_H3K27ac_perc) = c("Antibody", "no IgG", "IgG")
ENCODE_overlaps_H3K27ac_melt = melt(ENCODE_overlaps_H3K27ac_perc, id.vars = c("Antibody"))

ENCODE_overlap_plot = ggplot(ENCODE_overlaps_H3K27ac_melt, aes(Antibody, value)) +
  geom_bar(aes(fill = variable), 
  width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +  
  ylim(0, 100) +
  ylab("% of ENCODE peaks captured") +
  xlab("Antibody") +
  ggtitle("ENCODE peaks captured by C&T (H3K27ac)") +
  theme_bw()

ENCODE_overlap_plot

Proportion of C&T peaks falling into ENCODE (chart)

CT_in_ENCODE_H3K27ac_perc = ENCODE_overlaps_H3K27ac[c("Antibody", "Percentage_of_total_CT_noIgG", "Percentage_of_total_CT_IgG")]
colnames(CT_in_ENCODE_H3K27ac_perc) = c("Antibody", "no IgG", "IgG")
CT_in_ENCODE_H3K27ac_melt = melt(CT_in_ENCODE_H3K27ac_perc, id.vars = c("Antibody"))

ENCODE_overlap_plot = ggplot(CT_in_ENCODE_H3K27ac_melt, aes(Antibody, value)) +
  geom_bar(aes(fill = variable), 
  width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +  
  ylim(0, 100) +
  ylab("% of sample peaks in ENCODE") +
  xlab("Antibody") +
  ggtitle("C&T peaks falling into ENCODE (H3K27ac)") +
  theme_bw()

ENCODE_overlap_plot

ENCODE H3K27me3 peak overlap

C&T peaks in ENCODE

ENCODE_H3K27me3_overlapping_peaks_noIgG = c()
ENCODE_H3K27me3_overlapping_peaks_IgG = c()

for(i in 1:length(peak_list_IgG)){
  overlaps_H3K27me3_noIgG = GenomicRanges::countOverlaps(query = peak_list_noIgG[[i]], subject = ENCODE_H3K27me3)
  overlaps_H3K27me3_IgG = GenomicRanges::countOverlaps(query = peak_list_IgG[[i]], subject = ENCODE_H3K27me3)
  
  CT_peaks_in_ENCODE_H3K27me3_noIgG = overlaps_H3K27me3_noIgG[overlaps_H3K27me3_noIgG != 0] %>% length()
  CT_peaks_in_ENCODE_H3K27me3_IgG = overlaps_H3K27me3_IgG[overlaps_H3K27me3_IgG != 0] %>% length()
  
  ENCODE_H3K27me3_overlapping_peaks_noIgG = c(ENCODE_H3K27me3_overlapping_peaks_noIgG, CT_peaks_in_ENCODE_H3K27me3_noIgG)
  ENCODE_H3K27me3_overlapping_peaks_IgG = c(ENCODE_H3K27me3_overlapping_peaks_IgG, CT_peaks_in_ENCODE_H3K27me3_IgG)
}

Total_CT_peaks_falling_into_ENCODE = data.frame(Antibody = sampleList, CT_peaks_in_ENCODE_H3K27me3_noIgG = ENCODE_H3K27me3_overlapping_peaks_noIgG, CT_peaks_in_ENCODE_H3K27me3_IgG = ENCODE_H3K27me3_overlapping_peaks_IgG)
Total_CT_peaks_falling_into_ENCODE

ENCODE peaks in C&T

captured_H3K27me3_ENCODE_list_noIgG = c()
captured_H3K27me3_ENCODE_list_IgG = c()

for(i in 1:length(peak_list_IgG)){
  overlaps_H3K27me3_noIgG = GenomicRanges::countOverlaps(query = ENCODE_H3K27me3, subject = peak_list_noIgG[[i]])
  overlaps_H3K27me3_IgG = GenomicRanges::countOverlaps(query = ENCODE_H3K27me3, subject = peak_list_IgG[[i]])
  
  ENCODE_H3K27me3_captured_peaks_noIgG = overlaps_H3K27me3_noIgG[overlaps_H3K27me3_noIgG != 0] %>% length()
  ENCODE_H3K27me3_captured_peaks_IgG = overlaps_H3K27me3_IgG[overlaps_H3K27me3_IgG != 0] %>% length()
  
  captured_H3K27me3_ENCODE_list_noIgG = c(captured_H3K27me3_ENCODE_list_noIgG, ENCODE_H3K27me3_captured_peaks_noIgG)
  captured_H3K27me3_ENCODE_list_IgG = c(captured_H3K27me3_ENCODE_list_IgG, ENCODE_H3K27me3_captured_peaks_IgG)
}

Total_captured_ENCODE_peaks = data.frame(Antibody = sampleList, ENCODE_H3K27me3_captured_peaks_noIgG = captured_H3K27me3_ENCODE_list_noIgG, ENCODE_H3K27me3_captured_peaks_IgG = captured_H3K27me3_ENCODE_list_IgG)
Total_captured_ENCODE_peaks

Summary

ENCODE_overlaps_H3K27me3 = Total_CT_peaks %>% left_join(Total_captured_ENCODE_peaks, by = "Antibody") %>% left_join(Total_CT_peaks_falling_into_ENCODE, by = "Antibody")

ENCODE_overlaps_H3K27me3$Percentage_of_total_CT_IgG = ENCODE_overlaps_H3K27me3$CT_peaks_in_ENCODE_H3K27me3_IgG / ENCODE_overlaps_H3K27me3$Total_peaks_with_IgG * 100
ENCODE_overlaps_H3K27me3$Percentage_of_total_CT_noIgG = ENCODE_overlaps_H3K27me3$CT_peaks_in_ENCODE_H3K27me3_noIgG / ENCODE_overlaps_H3K27me3$Total_peaks_without_IgG * 100

ENCODE_overlaps_H3K27me3$Percentage_of_total_ENCODE_IgG = ENCODE_overlaps_H3K27me3$ENCODE_H3K27me3_captured_peaks_IgG / length(ENCODE_H3K27me3) * 100
ENCODE_overlaps_H3K27me3$Percentage_of_total_ENCODE_noIgG = ENCODE_overlaps_H3K27me3$ENCODE_H3K27me3_captured_peaks_noIgG / length(ENCODE_H3K27me3) * 100

ENCODE_overlaps_H3K27me3 = ENCODE_overlaps_H3K27me3[c("Antibody", "Total_peaks_with_IgG", "Total_peaks_without_IgG", "CT_peaks_in_ENCODE_H3K27me3_IgG", "Percentage_of_total_CT_IgG", "CT_peaks_in_ENCODE_H3K27me3_noIgG", "Percentage_of_total_CT_noIgG", "ENCODE_H3K27me3_captured_peaks_IgG", "Percentage_of_total_ENCODE_IgG", "ENCODE_H3K27me3_captured_peaks_noIgG", "Percentage_of_total_ENCODE_noIgG")]
ENCODE_overlaps_H3K27me3

Proportion of ENCODE peaks captured by C&T (chart)

ENCODE_overlaps_H3K27me3_perc = ENCODE_overlaps_H3K27me3[c("Antibody", "Percentage_of_total_ENCODE_noIgG", "Percentage_of_total_ENCODE_IgG")]
colnames(ENCODE_overlaps_H3K27me3_perc) = c("Antibody", "no IgG", "IgG")
ENCODE_overlaps_H3K27me3_melt = melt(ENCODE_overlaps_H3K27me3_perc, id.vars = c("Antibody"))

ENCODE_overlap_plot = ggplot(ENCODE_overlaps_H3K27me3_melt, aes(Antibody, value)) +
  geom_bar(aes(fill = variable), 
  width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +  
  ylim(0, 100) +
  ylab("% of ENCODE peaks captured") +
  xlab("Antibody") +
  ggtitle("ENCODE peaks captured by C&T (H3K27me3)") +
  theme_bw()

ENCODE_overlap_plot

Proportion of C&T peaks falling into ENCODE (chart)

CT_in_ENCODE_H3K27me3_perc = ENCODE_overlaps_H3K27me3[c("Antibody", "Percentage_of_total_CT_noIgG", "Percentage_of_total_CT_IgG")]
colnames(CT_in_ENCODE_H3K27me3_perc) = c("Antibody", "no IgG", "IgG")
CT_in_ENCODE_H3K27me3_melt = melt(CT_in_ENCODE_H3K27me3_perc, id.vars = c("Antibody"))

ENCODE_overlap_plot = ggplot(CT_in_ENCODE_H3K27me3_melt, aes(Antibody, value)) +
  geom_bar(aes(fill = variable), 
  width = 0.7, position = position_dodge(width = 0.75), stat = "identity") +  
  ylim(0, 100) +
  ylab("% of sample peaks in ENCODE") +
  xlab("Antibody") +
  ggtitle("C&T peaks falling into ENCODE (H3K27me3)") +
  theme_bw()

ENCODE_overlap_plot

Annotation

ChromHMM

chrHMM.url = "http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeBroadHmm/wgEncodeBroadHmmK562HMM.bed.gz"
chrHMM = genomation::readBed(chrHMM.url)
chrHMM.list = GenomicRanges::split(chrHMM, chrHMM$name, drop = TRUE)

Without IgG

peak2ann.l = genomation::annotateWithFeatures(peak_list_noIgG, chrHMM.list)
genomation::heatTargetAnnotation(peak2ann.l)

With IgG

peak2ann.l = genomation::annotateWithFeatures(peak_list_IgG, chrHMM.list)
genomation::heatTargetAnnotation(peak2ann.l)

ChIPseeker

Without IgG

peakAnnoList_IgG = lapply(peak_list_IgG, annotatePeak, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, tssRegion = c(-3000, 3000), verbose = FALSE)
ChIPseeker::plotAnnoBar(peakAnnoList_IgG, title = "No IgG")

With IgG

peakAnnoList_noIgG = lapply(peak_list_noIgG, annotatePeak, TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene, tssRegion = c(-3000, 3000), verbose = FALSE)
ChIPseeker::plotAnnoBar(peakAnnoList_noIgG, title = "With IgG")